#if !defined ( SFERA_DISABLE_OPENCL ) 
#include "renderer/ocl/kernels/kernels.h"
string KernelSource_kernel_core = 
"/***************************************************************************\n"
" *   Copyright (C) 1998-2010 by authors (see AUTHORS.txt)                  *\n"
" *                                                                         *\n"
" *   This file is part of Sfera.                                           *\n"
" *                                                                         *\n"
" *   Sfera is free software; you can redistribute it and/or modify         *\n"
" *   it under the terms of the GNU General Public License as published by  *\n"
" *   the Free Software Foundation; either version 3 of the License, or     *\n"
" *   (at your option) any later version.                                   *\n"
" *                                                                         *\n"
" *   Sfera is distributed in the hope that it will be useful,              *\n"
" *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *\n"
" *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *\n"
" *   GNU General Public License for more details.                          *\n"
" *                                                                         *\n"
" *   You should have received a copy of the GNU General Public License     *\n"
" *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *\n"
" *                                                                         *\n"
" ***************************************************************************/\n"
"\n"
"// List of symbols defined at compile time:\n"
"//  PARAM_SCREEN_WIDTH\n"
"//  PARAM_SCREEN_HEIGHT\n"
"//  PARAM_SCREEN_SAMPLEPERPASS\n"
"//  PARAM_RAY_EPSILON\n"
"//  PARAM_MAX_DIFFUSE_BOUNCE\n"
"//  PARAM_MAX_SPECULARGLOSSY_BOUNCE\n"
"//  PARAM_IL_SHIFT_U\n"
"//  PARAM_IL_SHIFT_V\n"
"//  PARAM_IL_GAIN_R\n"
"//  PARAM_IL_GAIN_G\n"
"//  PARAM_IL_GAIN_B\n"
"//  PARAM_IL_MAP_WIDTH\n"
"//  PARAM_IL_MAP_HEIGHT\n"
"//  PARAM_ENABLE_MAT_MATTE\n"
"//  PARAM_ENABLE_MAT_MIRROR\n"
"//  PARAM_ENABLE_MAT_GLASS\n"
"//  PARAM_ENABLE_MAT_METAL\n"
"//  PARAM_ENABLE_MAT_ALLOY\n"
"//  PARAM_HAS_TEXTUREMAPS\n"
"//  PARAM_HAS_BUMPMAPS\n"
"//  PARAM_GAMMA\n"
"//  PARAM_TM_LINEAR_SCALE\n"
"//  PARMA_MEM_TYPE\n"
"\n"
"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
"\n"
"#ifndef M_PI\n"
"#define M_PI 3.14159265358979323846f\n"
"#endif\n"
"\n"
"#ifndef INV_PI\n"
"#define INV_PI  0.31830988618379067154f\n"
"#endif\n"
"\n"
"#ifndef INV_TWOPI\n"
"#define INV_TWOPI  0.15915494309189533577f\n"
"#endif\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Types\n"
"//------------------------------------------------------------------------------\n"
"\n"
"typedef struct {\n"
"	float u, v;\n"
"} UV;\n"
"\n"
"typedef struct {\n"
"	float r, g, b;\n"
"} Spectrum;\n"
"\n"
"typedef struct {\n"
"	float x, y, z;\n"
"} Point;\n"
"\n"
"typedef struct {\n"
"	float x, y, z;\n"
"} Vector;\n"
"\n"
"typedef struct {\n"
"	Point o;\n"
"	Vector d;\n"
"	float mint, maxt;\n"
"} Ray;\n"
"\n"
"typedef struct {\n"
"	Point center;\n"
"	float rad;\n"
"} Sphere;\n"
"\n"
"typedef struct {\n"
"	Sphere bsphere;\n"
"	unsigned int primitiveIndex;\n"
"	unsigned int skipIndex;\n"
"} BVHAccelArrayNode;\n"
"\n"
"//------------------------------------------------------------------------------\n"
"\n"
"typedef struct {\n"
"	unsigned int s1, s2, s3;\n"
"} Seed;\n"
"\n"
"typedef struct {\n"
"	// The task seed\n"
"	Seed seed;\n"
"} GPUTask;\n"
"\n"
"typedef Spectrum Pixel;\n"
"\n"
"//------------------------------------------------------------------------------\n"
"\n"
"typedef struct {\n"
"	float lensRadius;\n"
"	float focalDistance;\n"
"	float yon, hither;\n"
"\n"
"	float rasterToCameraMatrix[4][4];\n"
"	float cameraToWorldMatrix[4][4];\n"
"} Camera;\n"
"\n"
"//------------------------------------------------------------------------------\n"
"\n"
"typedef struct {\n"
"	unsigned int rgbOffset;\n"
"	unsigned int width, height;\n"
"} TexMap;\n"
"\n"
"typedef struct {\n"
"	unsigned int texMapIndex;\n"
"	float shiftU, shiftV;\n"
"	float scaleU, scaleV;\n"
"} TexMapInstance;\n"
"\n"
"typedef struct {\n"
"	unsigned int texMapIndex;\n"
"	float shiftU, shiftV;\n"
"	float scaleU, scaleV;\n"
"	float scale;\n"
"} BumpMapInstance;\n"
"\n"
"//------------------------------------------------------------------------------\n"
"\n"
"#define MAT_MATTE 0\n"
"#define MAT_MIRROR 1\n"
"#define MAT_GLASS 2\n"
"#define MAT_METAL 3\n"
"#define MAT_ALLOY 4\n"
"\n"
"typedef struct {\n"
"    float r, g, b;\n"
"} MatteParam;\n"
"\n"
"typedef struct {\n"
"    float r, g, b;\n"
"} MirrorParam;\n"
"\n"
"typedef struct {\n"
"    float refl_r, refl_g, refl_b;\n"
"    float refrct_r, refrct_g, refrct_b;\n"
"    float ousideIor, ior;\n"
"    float R0;\n"
"} GlassParam;\n"
"\n"
"typedef struct {\n"
"    float r, g, b;\n"
"    float exponent;\n"
"} MetalParam;\n"
"\n"
"typedef struct {\n"
"    float diff_r, diff_g, diff_b;\n"
"    float refl_r, refl_g, refl_b;\n"
"    float exponent;\n"
"    float R0;\n"
"} AlloyParam;\n"
"\n"
"typedef struct {\n"
"	unsigned int type;\n"
"	float emi_r, emi_g, emi_b;\n"
"\n"
"	union {\n"
"		MatteParam matte;\n"
"		MirrorParam mirror;\n"
"        GlassParam glass;\n"
"        MetalParam metal;\n"
"        AlloyParam alloy;\n"
"	} param;\n"
"} Material;\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Random number generator\n"
"// maximally equidistributed combined Tausworthe generator\n"
"//------------------------------------------------------------------------------\n"
"\n"
"#define FLOATMASK 0x00ffffffu\n"
"\n"
"uint TAUSWORTHE(const uint s, const uint a,\n"
"	const uint b, const uint c,\n"
"	const uint d) {\n"
"	return ((s&c)<<d) ^ (((s << a) ^ s) >> b);\n"
"}\n"
"\n"
"uint LCG(const uint x) { return x * 69069; }\n"
"\n"
"uint ValidSeed(const uint x, const uint m) {\n"
"	return (x < m) ? (x + m) : x;\n"
"}\n"
"\n"
"void InitRandomGenerator(uint seed, Seed *s) {\n"
"	// Avoid 0 value\n"
"	seed = (seed == 0) ? (seed + 0xffffffu) : seed;\n"
"\n"
"	s->s1 = ValidSeed(LCG(seed), 1);\n"
"	s->s2 = ValidSeed(LCG(s->s1), 7);\n"
"	s->s3 = ValidSeed(LCG(s->s2), 15);\n"
"}\n"
"\n"
"unsigned long RndUintValue(Seed *s) {\n"
"	s->s1 = TAUSWORTHE(s->s1, 13, 19, 4294967294UL, 12);\n"
"	s->s2 = TAUSWORTHE(s->s2, 2, 25, 4294967288UL, 4);\n"
"	s->s3 = TAUSWORTHE(s->s3, 3, 11, 4294967280UL, 17);\n"
"\n"
"	return ((s->s1) ^ (s->s2) ^ (s->s3));\n"
"}\n"
"\n"
"float RndFloatValue(Seed *s) {\n"
"	return (RndUintValue(s) & FLOATMASK) * (1.f / (FLOATMASK + 1UL));\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"\n"
"float Spectrum_Y(const Spectrum *s) {\n"
"	return 0.212671f * s->r + 0.715160f * s->g + 0.072169f * s->b;\n"
"}\n"
"\n"
"float Spectrum_Filter(const Spectrum *s) {\n"
"	return max(max(s->r, s->g), s->b);\n"
"}\n"
"\n"
"float Dot(const Vector *v0, const Vector *v1) {\n"
"	return v0->x * v1->x + v0->y * v1->y + v0->z * v1->z;\n"
"}\n"
"\n"
"void Normalize(Vector *v) {\n"
"	const float il = 1.f / sqrt(Dot(v, v));\n"
"\n"
"	v->x *= il;\n"
"	v->y *= il;\n"
"	v->z *= il;\n"
"}\n"
"\n"
"void Cross(Vector *v3, const Vector *v1, const Vector *v2) {\n"
"	v3->x = (v1->y * v2->z) - (v1->z * v2->y);\n"
"	v3->y = (v1->z * v2->x) - (v1->x * v2->z),\n"
"	v3->z = (v1->x * v2->y) - (v1->y * v2->x);\n"
"}\n"
"\n"
"int Mod(int a, int b) {\n"
"	if (b == 0)\n"
"		b = 1;\n"
"\n"
"	a %= b;\n"
"	if (a < 0)\n"
"		a += b;\n"
"\n"
"	return a;\n"
"}\n"
"\n"
"float Lerp(float t, float v1, float v2) {\n"
"	return (1.f - t) * v1 + t * v2;\n"
"}\n"
"\n"
"void ConcentricSampleDisk(const float u1, const float u2, float *dx, float *dy) {\n"
"	float r, theta;\n"
"	// Map uniform random numbers to $[-1,1]^2$\n"
"	float sx = 2.f * u1 - 1.f;\n"
"	float sy = 2.f * u2 - 1.f;\n"
"	// Map square to $(r,\theta)$\n"
"	// Handle degeneracy at the origin\n"
"	if (sx == 0.f && sy == 0.f) {\n"
"		*dx = 0.f;\n"
"		*dy = 0.f;\n"
"		return;\n"
"	}\n"
"	if (sx >= -sy) {\n"
"		if (sx > sy) {\n"
"			// Handle first region of disk\n"
"			r = sx;\n"
"			if (sy > 0.f)\n"
"				theta = sy / r;\n"
"			else\n"
"				theta = 8.f + sy / r;\n"
"		} else {\n"
"			// Handle second region of disk\n"
"			r = sy;\n"
"			theta = 2.f - sx / r;\n"
"		}\n"
"	} else {\n"
"		if (sx <= sy) {\n"
"			// Handle third region of disk\n"
"			r = -sx;\n"
"			theta = 4.f - sy / r;\n"
"		} else {\n"
"			// Handle fourth region of disk\n"
"			r = -sy;\n"
"			theta = 6.f + sx / r;\n"
"		}\n"
"	}\n"
"	theta *= M_PI / 4.f;\n"
"	*dx = r * cos(theta);\n"
"	*dy = r * sin(theta);\n"
"}\n"
"\n"
"void CosineSampleHemisphere(Vector *ret, const float u1, const float u2) {\n"
"	ConcentricSampleDisk(u1, u2, &ret->x, &ret->y);\n"
"	ret->z = sqrt(max(0.f, 1.f - ret->x * ret->x - ret->y * ret->y));\n"
"}\n"
"\n"
"void UniformSampleCone(Vector *ret, const float u1, const float u2, const float costhetamax,\n"
"	const Vector *x, const Vector *y, const Vector *z) {\n"
"	const float costheta = Lerp(u1, costhetamax, 1.f);\n"
"	const float sintheta = sqrt(1.f - costheta * costheta);\n"
"	const float phi = u2 * 2.f * M_PI;\n"
"\n"
"	const float kx = cos(phi) * sintheta;\n"
"	const float ky = sin(phi) * sintheta;\n"
"	const float kz = costheta;\n"
"\n"
"	ret->x = kx * x->x + ky * y->x + kz * z->x;\n"
"	ret->y = kx * x->y + ky * y->y + kz * z->y;\n"
"	ret->z = kx * x->z + ky * y->z + kz * z->z;\n"
"}\n"
"\n"
"float UniformConePdf(float costhetamax) {\n"
"	return 1.f / (2.f * M_PI * (1.f - costhetamax));\n"
"}\n"
"\n"
"void CoordinateSystem(const Vector *v1, Vector *v2, Vector *v3) {\n"
"	if (fabs(v1->x) > fabs(v1->y)) {\n"
"		float invLen = 1.f / sqrt(v1->x * v1->x + v1->z * v1->z);\n"
"		v2->x = -v1->z * invLen;\n"
"		v2->y = 0.f;\n"
"		v2->z = v1->x * invLen;\n"
"	} else {\n"
"		float invLen = 1.f / sqrt(v1->y * v1->y + v1->z * v1->z);\n"
"		v2->x = 0.f;\n"
"		v2->y = v1->z * invLen;\n"
"		v2->z = -v1->y * invLen;\n"
"	}\n"
"\n"
"	Cross(v3, v1, v2);\n"
"}\n"
"\n"
"float SphericalTheta(const Vector *v) {\n"
"	return acos(clamp(v->z, -1.f, 1.f));\n"
"}\n"
"\n"
"float SphericalPhi(const Vector *v) {\n"
"	float p = atan2(v->y, v->x);\n"
"	return (p < 0.f) ? p + 2.f * M_PI : p;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Texture maps\n"
"//------------------------------------------------------------------------------\n"
"\n"
"void TexMap_GetTexel(__global Spectrum *pixels, const uint width, const uint height,\n"
"		const int s, const int t, Spectrum *col) {\n"
"	const uint u = Mod(s, width);\n"
"	const uint v = Mod(t, height);\n"
"\n"
"	const unsigned index = v * width + u;\n"
"\n"
"	col->r = pixels[index].r;\n"
"	col->g = pixels[index].g;\n"
"	col->b = pixels[index].b;\n"
"}\n"
"\n"
"void TexMap_GetColor(__global Spectrum *pixels, const uint width, const uint height,\n"
"		const float u, const float v, Spectrum *col) {\n"
"	const float s = u * width - 0.5f;\n"
"	const float t = v * height - 0.5f;\n"
"\n"
"	const int s0 = (int)floor(s);\n"
"	const int t0 = (int)floor(t);\n"
"\n"
"	const float ds = s - s0;\n"
"	const float dt = t - t0;\n"
"\n"
"	const float ids = 1.f - ds;\n"
"	const float idt = 1.f - dt;\n"
"\n"
"	Spectrum c0, c1, c2, c3;\n"
"	TexMap_GetTexel(pixels, width, height, s0, t0, &c0);\n"
"	TexMap_GetTexel(pixels, width, height, s0, t0 + 1, &c1);\n"
"	TexMap_GetTexel(pixels, width, height, s0 + 1, t0, &c2);\n"
"	TexMap_GetTexel(pixels, width, height, s0 + 1, t0 + 1, &c3);\n"
"\n"
"	const float k0 = ids * idt;\n"
"	const float k1 = ids * dt;\n"
"	const float k2 = ds * idt;\n"
"	const float k3 = ds * dt;\n"
"\n"
"	col->r = k0 * c0.r + k1 * c1.r + k2 * c2.r + k3 * c3.r;\n"
"	col->g = k0 * c0.g + k1 * c1.g + k2 * c2.g + k3 * c3.g;\n"
"	col->b = k0 * c0.b + k1 * c1.b + k2 * c2.b + k3 * c3.b;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// InfiniteLight_Le\n"
"//------------------------------------------------------------------------------\n"
"\n"
"void InfiniteLight_Le(__global Spectrum *infiniteLightMap, Spectrum *le, const Vector *dir) {\n"
"	const float u = 1.f - SphericalPhi(dir) * INV_TWOPI +  PARAM_IL_SHIFT_U;\n"
"	const float v = SphericalTheta(dir) * INV_PI + PARAM_IL_SHIFT_V;\n"
"\n"
"	TexMap_GetColor(infiniteLightMap, PARAM_IL_MAP_WIDTH, PARAM_IL_MAP_HEIGHT, u, v, le);\n"
"\n"
"	le->r *= PARAM_IL_GAIN_R;\n"
"	le->g *= PARAM_IL_GAIN_G;\n"
"	le->b *= PARAM_IL_GAIN_B;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// GenerateCameraRay\n"
"//------------------------------------------------------------------------------\n"
"\n"
"void GenerateCameraRay(\n"
"		Seed *seed,\n"
"		PARAM_MEM_TYPE Camera *camera,\n"
"		const uint pixelIndex,\n"
"		Ray *ray) {\n"
"	const float scrSampleX = RndFloatValue(seed);\n"
"	const float scrSampleY = RndFloatValue(seed);\n"
"\n"
"	const float screenX = pixelIndex % PARAM_SCREEN_WIDTH + scrSampleX - .5f;\n"
"	const float screenY = pixelIndex / PARAM_SCREEN_WIDTH + scrSampleY - .5f;\n"
"\n"
"	Point Pras;\n"
"	Pras.x = screenX;\n"
"	Pras.y = PARAM_SCREEN_HEIGHT - screenY - 1.f;\n"
"	Pras.z = 0;\n"
"\n"
"	Point orig;\n"
"	// RasterToCamera(Pras, &orig);\n"
"\n"
"	const float iw = 1.f / (camera->rasterToCameraMatrix[3][0] * Pras.x + camera->rasterToCameraMatrix[3][1] * Pras.y + camera->rasterToCameraMatrix[3][2] * Pras.z + camera->rasterToCameraMatrix[3][3]);\n"
"	orig.x = (camera->rasterToCameraMatrix[0][0] * Pras.x + camera->rasterToCameraMatrix[0][1] * Pras.y + camera->rasterToCameraMatrix[0][2] * Pras.z + camera->rasterToCameraMatrix[0][3]) * iw;\n"
"	orig.y = (camera->rasterToCameraMatrix[1][0] * Pras.x + camera->rasterToCameraMatrix[1][1] * Pras.y + camera->rasterToCameraMatrix[1][2] * Pras.z + camera->rasterToCameraMatrix[1][3]) * iw;\n"
"	orig.z = (camera->rasterToCameraMatrix[2][0] * Pras.x + camera->rasterToCameraMatrix[2][1] * Pras.y + camera->rasterToCameraMatrix[2][2] * Pras.z + camera->rasterToCameraMatrix[2][3]) * iw;\n"
"\n"
"	Vector dir;\n"
"	dir.x = orig.x;\n"
"	dir.y = orig.y;\n"
"	dir.z = orig.z;\n"
"\n"
"	const float hither = camera->hither;\n"
"\n"
"	Normalize(&dir);\n"
"\n"
"	// CameraToWorld(*ray, ray);\n"
"	Point torig;\n"
"	const float iw2 = 1.f / (camera->cameraToWorldMatrix[3][0] * orig.x + camera->cameraToWorldMatrix[3][1] * orig.y + camera->cameraToWorldMatrix[3][2] * orig.z + camera->cameraToWorldMatrix[3][3]);\n"
"	torig.x = (camera->cameraToWorldMatrix[0][0] * orig.x + camera->cameraToWorldMatrix[0][1] * orig.y + camera->cameraToWorldMatrix[0][2] * orig.z + camera->cameraToWorldMatrix[0][3]) * iw2;\n"
"	torig.y = (camera->cameraToWorldMatrix[1][0] * orig.x + camera->cameraToWorldMatrix[1][1] * orig.y + camera->cameraToWorldMatrix[1][2] * orig.z + camera->cameraToWorldMatrix[1][3]) * iw2;\n"
"	torig.z = (camera->cameraToWorldMatrix[2][0] * orig.x + camera->cameraToWorldMatrix[2][1] * orig.y + camera->cameraToWorldMatrix[2][2] * orig.z + camera->cameraToWorldMatrix[2][3]) * iw2;\n"
"\n"
"	Vector tdir;\n"
"	tdir.x = camera->cameraToWorldMatrix[0][0] * dir.x + camera->cameraToWorldMatrix[0][1] * dir.y + camera->cameraToWorldMatrix[0][2] * dir.z;\n"
"	tdir.y = camera->cameraToWorldMatrix[1][0] * dir.x + camera->cameraToWorldMatrix[1][1] * dir.y + camera->cameraToWorldMatrix[1][2] * dir.z;\n"
"	tdir.z = camera->cameraToWorldMatrix[2][0] * dir.x + camera->cameraToWorldMatrix[2][1] * dir.y + camera->cameraToWorldMatrix[2][2] * dir.z;\n"
"\n"
"	ray->o = torig;\n"
"	ray->d = tdir;\n"
"	ray->mint = PARAM_RAY_EPSILON;\n"
"	ray->maxt = (camera->yon - hither) / dir.z;\n"
"\n"
"	/*printf(\"(%f, %f, %f) (%f, %f, %f) [%f, %f]\\n\",\n"
"		ray->o.x, ray->o.y, ray->o.z, ray->d.x, ray->d.y, ray->d.z,\n"
"		ray->mint, ray->maxt);*/\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// BVH intersect\n"
"//------------------------------------------------------------------------------\n"
"\n"
"bool Sphere_IntersectP(PARAM_MEM_TYPE BVHAccelArrayNode *bvhNode, const Ray *ray, float *hitT) {\n"
"	const Point center = bvhNode->bsphere.center;\n"
"	const float rad = bvhNode->bsphere.rad;\n"
"\n"
"	Vector op;\n"
"	op.x = center.x - ray->o.x;\n"
"	op.y = center.y - ray->o.y;\n"
"	op.z = center.z - ray->o.z;\n"
"	const float b = Dot(&op, &ray->d);\n"
"\n"
"	float det = b * b - Dot(&op, &op) + rad * rad;\n"
"	if (det < 0.f)\n"
"		return false;\n"
"	else\n"
"		det = sqrt(det);\n"
"\n"
"	float t = b - det;\n"
"	if ((t > ray->mint) && ((t < ray->maxt)))\n"
"		*hitT = t;\n"
"	else {\n"
"		t = b + det;\n"
"\n"
"		if ((t > ray->mint) && ((t < ray->maxt)))\n"
"			*hitT = t;\n"
"		else\n"
"			*hitT = INFINITY;\n"
"	}\n"
"\n"
"	return true;\n"
"}\n"
"\n"
"bool BVH_Intersect(\n"
"		Ray *ray,\n"
"		PARAM_MEM_TYPE Sphere **hitSphere,\n"
"		uint *primitiveIndex,\n"
"		PARAM_MEM_TYPE BVHAccelArrayNode *bvhTree) {\n"
"	unsigned int currentNode = 0; // Root Node\n"
"	unsigned int stopNode = bvhTree[0].skipIndex; // Non-existent\n"
"	*primitiveIndex = 0xffffffffu;\n"
"\n"
"	while (currentNode < stopNode) {\n"
"		float hitT;\n"
"		if (Sphere_IntersectP(&bvhTree[currentNode], ray, &hitT)) {\n"
"			if ((bvhTree[currentNode].primitiveIndex != 0xffffffffu) && (hitT < ray->maxt)){\n"
"				ray->maxt = hitT;\n"
"				*hitSphere = &bvhTree[currentNode].bsphere;\n"
"				*primitiveIndex = bvhTree[currentNode].primitiveIndex;\n"
"				// Continue testing for closer intersections\n"
"			}\n"
"\n"
"			currentNode++;\n"
"		} else\n"
"			currentNode = bvhTree[currentNode].skipIndex;\n"
"	}\n"
"\n"
"	return (*primitiveIndex) != 0xffffffffu;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Materials\n"
"//------------------------------------------------------------------------------\n"
"\n"
"void Matte_Sample_f(const PARAM_MEM_TYPE MatteParam *mat, const Vector *wo, Vector *wi,\n"
"		float *pdf, Spectrum *f, const Vector *shadeN,\n"
"		Seed *seed,\n"
"		bool *diffuseBounce) {\n"
"	Vector dir;\n"
"	CosineSampleHemisphere(&dir, RndFloatValue(seed), RndFloatValue(seed));\n"
"	const float dp = dir.z;\n"
"	// Using 0.0001 instead of 0.0 to cut down fireflies\n"
"	if (dp <= 0.0001f) {\n"
"		*pdf = 0.f;\n"
"		return;\n"
"	}\n"
"\n"
"	*pdf = INV_PI;\n"
"\n"
"	Vector v1, v2;\n"
"	CoordinateSystem(shadeN, &v1, &v2);\n"
"\n"
"	wi->x = v1.x * dir.x + v2.x * dir.y + shadeN->x * dir.z;\n"
"	wi->y = v1.y * dir.x + v2.y * dir.y + shadeN->y * dir.z;\n"
"	wi->z = v1.z * dir.x + v2.z * dir.y + shadeN->z * dir.z;\n"
"\n"
"	f->r = mat->r * dp;\n"
"	f->g = mat->g * dp;\n"
"	f->b = mat->b * dp;\n"
"\n"
"	*diffuseBounce = true;\n"
"}\n"
"\n"
"void Mirror_Sample_f(const PARAM_MEM_TYPE MirrorParam *mat, const Vector *wo, Vector *wi,\n"
"		float *pdf, Spectrum *f, const Vector *shadeN,\n"
"		bool *diffuseBounce) {\n"
"    const float k = 2.f * Dot(shadeN, wo);\n"
"	wi->x = k * shadeN->x - wo->x;\n"
"	wi->y = k * shadeN->y - wo->y;\n"
"	wi->z = k * shadeN->z - wo->z;\n"
"\n"
"	*pdf = 1.f;\n"
"\n"
"	f->r = mat->r;\n"
"	f->g = mat->g;\n"
"	f->b = mat->b;\n"
"\n"
"	*diffuseBounce = false;\n"
"}\n"
"\n"
"void Glass_Sample_f(const PARAM_MEM_TYPE GlassParam *mat,\n"
"    const Vector *wo, Vector *wi, float *pdf, Spectrum *f, const Vector *N, const Vector *shadeN,\n"
"    Seed *seed,\n"
"	bool *diffuseBounce) {\n"
"    Vector reflDir;\n"
"    const float k = 2.f * Dot(N, wo);\n"
"    reflDir.x = k * N->x - wo->x;\n"
"    reflDir.y = k * N->y - wo->y;\n"
"    reflDir.z = k * N->z - wo->z;\n"
"\n"
"    // Ray from outside going in ?\n"
"    const bool into = (Dot(N, shadeN) > 0.f);\n"
"\n"
"    const float nc = mat->ousideIor;\n"
"    const float nt = mat->ior;\n"
"    const float nnt = into ? (nc / nt) : (nt / nc);\n"
"    const float ddn = -Dot(wo, shadeN);\n"
"    const float cos2t = 1.f - nnt * nnt * (1.f - ddn * ddn);\n"
"\n"
"	*diffuseBounce = false;\n"
"\n"
"    // Total internal reflection\n"
"    if (cos2t < 0.f) {\n"
"        *wi = reflDir;\n"
"        *pdf = 1.f;\n"
"\n"
"        f->r = mat->refl_r;\n"
"        f->g = mat->refl_g;\n"
"        f->b = mat->refl_b;\n"
"    } else {\n"
"        const float kk = (into ? 1.f : -1.f) * (ddn * nnt + sqrt(cos2t));\n"
"        Vector nkk = *N;\n"
"        nkk.x *= kk;\n"
"        nkk.y *= kk;\n"
"        nkk.z *= kk;\n"
"\n"
"        Vector transDir;\n"
"        transDir.x = -nnt * wo->x - nkk.x;\n"
"        transDir.y = -nnt * wo->y - nkk.y;\n"
"        transDir.z = -nnt * wo->z - nkk.z;\n"
"        Normalize(&transDir);\n"
"\n"
"        const float c = min(1.f, 1.f - (into ? -ddn : Dot(&transDir, N)));\n"
"\n"
"        const float R0 = mat->R0;\n"
"        const float Re = R0 + (1.f - R0) * c * c * c * c * c;\n"
"        const float Tr = 1.f - Re;\n"
"        const float P = .25f + .5f * Re;\n"
"\n"
"        if (Tr == 0.f) {\n"
"            if (Re == 0.f)\n"
"                *pdf = 0.f;\n"
"            else {\n"
"                *wi = reflDir;\n"
"                *pdf = 1.f;\n"
"\n"
"                f->r = mat->refl_r;\n"
"                f->g = mat->refl_g;\n"
"                f->b = mat->refl_b;\n"
"            }\n"
"        } else if (Re == 0.f) {\n"
"            *wi = transDir;\n"
"            *pdf = 1.f;\n"
"\n"
"            f->r = mat->refrct_r;\n"
"            f->g = mat->refrct_g;\n"
"            f->b = mat->refrct_b;\n"
"        } else if (RndFloatValue(seed) < P) {\n"
"            *wi = reflDir;\n"
"            *pdf = P / Re;\n"
"\n"
"            f->r = mat->refl_r / (*pdf);\n"
"            f->g = mat->refl_g / (*pdf);\n"
"            f->b = mat->refl_b / (*pdf);\n"
"        } else {\n"
"            *wi = transDir;\n"
"            *pdf = (1.f - P) / Tr;\n"
"\n"
"            f->r = mat->refrct_r / (*pdf);\n"
"            f->g = mat->refrct_g / (*pdf);\n"
"            f->b = mat->refrct_b / (*pdf);\n"
"        }\n"
"    }\n"
"}\n"
"\n"
"void GlossyReflection(const Vector *wo, Vector *wi, const float exponent,\n"
"		const Vector *shadeN,\n"
"		const float u0, const float u1) {\n"
"    const float phi = 2.f * M_PI * u0;\n"
"    const float cosTheta = pow(1.f - u1, exponent);\n"
"    const float sinTheta = sqrt(1.f - cosTheta * cosTheta);\n"
"    const float x = cos(phi) * sinTheta;\n"
"    const float y = sin(phi) * sinTheta;\n"
"    const float z = cosTheta;\n"
"\n"
"    Vector w;\n"
"    const float RdotShadeN = Dot(shadeN, wo);\n"
"	w.x = (2.f * RdotShadeN) * shadeN->x - wo->x;\n"
"	w.y = (2.f * RdotShadeN) * shadeN->y - wo->y;\n"
"	w.z = (2.f * RdotShadeN) * shadeN->z - wo->z;\n"
"\n"
"    Vector u, a;\n"
"    if (fabs(shadeN->x) > .1f) {\n"
"        a.x = 0.f;\n"
"        a.y = 1.f;\n"
"    } else {\n"
"        a.x = 1.f;\n"
"        a.y = 0.f;\n"
"    }\n"
"    a.z = 0.f;\n"
"    Cross(&u, &a, &w);\n"
"    Normalize(&u);\n"
"    Vector v;\n"
"    Cross(&v, &w, &u);\n"
"\n"
"    wi->x = x * u.x + y * v.x + z * w.x;\n"
"    wi->y = x * u.y + y * v.y + z * w.y;\n"
"    wi->z = x * u.z + y * v.z + z * w.z;\n"
"}\n"
"\n"
"void Metal_Sample_f(const PARAM_MEM_TYPE MetalParam *mat, const Vector *wo, Vector *wi,\n"
"		float *pdf, Spectrum *f, const Vector *shadeN,\n"
"		Seed *seed,\n"
"		bool *diffuseBounce) {\n"
"	GlossyReflection(wo, wi, mat->exponent, shadeN, RndFloatValue(seed), RndFloatValue(seed));\n"
"\n"
"	f->r = mat->r;\n"
"	f->g = mat->g;\n"
"	f->b = mat->b;\n"
"\n"
"	*diffuseBounce = true;\n"
"\n"
"	*pdf =  (Dot(wi, shadeN) > 0.f) ? 1.f : 0.f;\n"
"}\n"
"\n"
"void Alloy_Sample_f(const PARAM_MEM_TYPE AlloyParam *mat, const Vector *wo, Vector *wi,\n"
"		float *pdf, Spectrum *f, const Vector *shadeN,\n"
"		Seed *seed,\n"
"		bool *diffuseBounce) {\n"
"    // Schilick's approximation\n"
"    const float c = 1.f - Dot(wo, shadeN);\n"
"    const float R0 = mat->R0;\n"
"    const float Re = R0 + (1.f - R0) * c * c * c * c * c;\n"
"\n"
"    const float P = .25f + .5f * Re;\n"
"\n"
"	const float u0 = RndFloatValue(seed);\n"
"	const float u1 = RndFloatValue(seed);\n"
"\n"
"    if (RndFloatValue(seed) <= P) {\n"
"        GlossyReflection(wo, wi, mat->exponent, shadeN, u0, u1);\n"
"        *pdf = P / Re;\n"
"\n"
"        f->r = mat->refl_r / (*pdf);\n"
"        f->g = mat->refl_g / (*pdf);\n"
"        f->b = mat->refl_b / (*pdf);\n"
"\n"
"		*diffuseBounce = true;\n"
"    } else {\n"
"        Vector dir;\n"
"        CosineSampleHemisphere(&dir, u0, u1);\n"
"		const float dp = dir.z;\n"
"		// Using 0.0001 instead of 0.0 to cut down fireflies\n"
"		if (dp <= 0.0001f) {\n"
"			*pdf = 0.f;\n"
"			return;\n"
"		}\n"
"\n"
"        *pdf = INV_PI;\n"
"\n"
"        Vector v1, v2;\n"
"        CoordinateSystem(shadeN, &v1, &v2);\n"
"\n"
"        wi->x = v1.x * dir.x + v2.x * dir.y + shadeN->x * dir.z;\n"
"        wi->y = v1.y * dir.x + v2.y * dir.y + shadeN->y * dir.z;\n"
"        wi->z = v1.z * dir.x + v2.z * dir.y + shadeN->z * dir.z;\n"
"\n"
"		const float iRe = 1.f - Re;\n"
"		const float k = (1.f - P) / iRe;\n"
"		*pdf *= k;\n"
"\n"
"		const float dpk = dp / k;\n"
"		f->r = mat->diff_r * dpk;\n"
"		f->g = mat->diff_g * dpk;\n"
"		f->b = mat->diff_b * dpk;\n"
"\n"
"		*diffuseBounce = false;\n"
"	}\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Init Kernel\n"
"//------------------------------------------------------------------------------\n"
"\n"
"__kernel void Init(\n"
"		__global GPUTask *tasks\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH * PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	// Initialize the task\n"
"	__global GPUTask *task = &tasks[gid];\n"
"\n"
"	// Initialize random number generator\n"
"	Seed seed;\n"
"	InitRandomGenerator(gid + 1, &seed);\n"
"\n"
"	// Save the seed\n"
"	task->seed.s1 = seed.s1;\n"
"	task->seed.s2 = seed.s2;\n"
"	task->seed.s3 = seed.s3;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// InitFB Kernel\n"
"//------------------------------------------------------------------------------\n"
"\n"
"__kernel void InitFB(\n"
"		__global Pixel *frameBuffer\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH * PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	__global Pixel *p = &frameBuffer[gid];\n"
"	p->r = 0.f;\n"
"	p->g = 0.f;\n"
"	p->b = 0.f;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// PathTracing Kernel\n"
"//------------------------------------------------------------------------------\n"
"\n"
"__kernel void PathTracing(\n"
"		__global GPUTask *tasks,\n"
"		PARAM_MEM_TYPE BVHAccelArrayNode *bvhRoot,\n"
"		PARAM_MEM_TYPE Camera *camera,\n"
"		__global Spectrum *infiniteLightMap,\n"
"		__global Pixel *frameBuffer,\n"
"		PARAM_MEM_TYPE Material *mats,\n"
"		__global uint *sphereMats\n"
"#if defined(PARAM_HAS_TEXTUREMAPS)\n"
"		, PARAM_MEM_TYPE TexMap *texMaps\n"
"		, __global Spectrum *texMapRGB\n"
"		, PARAM_MEM_TYPE TexMapInstance *sphereTexMaps\n"
"#if defined (PARAM_HAS_BUMPMAPS)\n"
"		, PARAM_MEM_TYPE BumpMapInstance *sphereBumpMaps\n"
"#endif\n"
"#endif\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH * PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	__global GPUTask *task = &tasks[gid];\n"
"	const uint pixelIndex = gid;\n"
"\n"
"	// Read the seed\n"
"	Seed seed;\n"
"	seed.s1 = task->seed.s1;\n"
"	seed.s2 = task->seed.s2;\n"
"	seed.s3 = task->seed.s3;\n"
"\n"
"	Spectrum radiance;\n"
"	radiance.r = 0.f;\n"
"	radiance.g = 0.f;\n"
"	radiance.b = 0.f;\n"
"\n"
"	Ray ray;\n"
"	GenerateCameraRay(&seed, camera, pixelIndex, &ray);\n"
"\n"
"	Spectrum throughput;\n"
"	throughput.r = 1.f;\n"
"	throughput.g = 1.f;\n"
"	throughput.b = 1.f;\n"
"\n"
"	uint diffuseBounces = 0;\n"
"	uint specularGlossyBounces = 0;\n"
"\n"
"	for(;;) {\n"
"		PARAM_MEM_TYPE Sphere *hitSphere;\n"
"		uint sphereIndex;\n"
"		if (BVH_Intersect(&ray, &hitSphere, &sphereIndex, bvhRoot)) {\n"
"			const PARAM_MEM_TYPE Material *hitPointMat = &mats[sphereMats[sphereIndex]];\n"
"#if defined(PARAM_HAS_TEXTUREMAPS)\n"
"			const PARAM_MEM_TYPE TexMapInstance *hitTexMapInst = &sphereTexMaps[sphereIndex];\n"
"#if defined (PARAM_HAS_BUMPMAPS)\n"
"			const PARAM_MEM_TYPE BumpMapInstance *hitBumpMapInst = &sphereBumpMaps[sphereIndex];\n"
"#endif\n"
"#endif\n"
"\n"
"			Point hitPoint;\n"
"			hitPoint.x = ray.o.x + ray.maxt * ray.d.x;\n"
"			hitPoint.y = ray.o.y + ray.maxt * ray.d.y;\n"
"			hitPoint.z = ray.o.z + ray.maxt * ray.d.z;\n"
"\n"
"			Vector N;\n"
"			N.x = hitPoint.x - hitSphere->center.x;\n"
"			N.y = hitPoint.y - hitSphere->center.y;\n"
"			N.z = hitPoint.z - hitSphere->center.z;\n"
"			Normalize(&N);\n"
"\n"
"			Vector shadeN = N;\n"
"\n"
"#if defined (PARAM_HAS_BUMPMAPS)\n"
"			const uint bumpMapIndex = hitBumpMapInst->texMapIndex;\n"
"			if (bumpMapIndex != 0xffffffffu) {\n"
"				const float u0 = SphericalPhi(&N) * INV_TWOPI * hitBumpMapInst->scaleU + hitBumpMapInst->shiftU;\n"
"				const float v0 = SphericalTheta(&N) * INV_PI * hitBumpMapInst->scaleV + hitBumpMapInst->shiftV;\n"
"\n"
"				const PARAM_MEM_TYPE TexMap *tm = &texMaps[bumpMapIndex];\n"
"				const unsigned int width = tm->width;\n"
"				const unsigned int height = tm->height;\n"
"\n"
"				const float du = 1.f / width;\n"
"				const float dv = 1.f / height;\n"
"\n"
"				Spectrum col0;\n"
"				TexMap_GetColor(&texMapRGB[tm->rgbOffset], width, height, u0, v0, &col0);\n"
"				const float b0 = Spectrum_Filter(&col0);\n"
"\n"
"				Spectrum colu;\n"
"				TexMap_GetColor(&texMapRGB[tm->rgbOffset], width, height, u0 + du, v0, &colu);\n"
"				const float bu = Spectrum_Filter(&colu);\n"
"\n"
"				Spectrum colv;\n"
"				TexMap_GetColor(&texMapRGB[tm->rgbOffset], width, height, u0, v0 + dv, &colv);\n"
"				const float bv = Spectrum_Filter(&colv);\n"
"\n"
"				const float scale = hitBumpMapInst->scale;\n"
"				Vector bump;\n"
"				bump.x = scale * (bu - b0);\n"
"				bump.y = scale * (bv - b0);\n"
"				bump.z = 1.f;\n"
"\n"
"				Vector v1, v2;\n"
"				CoordinateSystem(&N, &v1, &v2);\n"
"\n"
"				shadeN.x = v1.x * bump.x + v2.x * bump.y + N.x * bump.z;\n"
"				shadeN.y = v1.y * bump.x + v2.y * bump.y + N.y * bump.z;\n"
"				shadeN.z = v1.z * bump.x + v2.z * bump.y + N.z * bump.z;\n"
"				Normalize(&shadeN);\n"
"			}\n"
"#endif\n"
"\n"
"			// Check if I have to flip the normal\n"
"			const bool flipNormal = (Dot(&N, &ray.d) > 0.f);\n"
"			if (flipNormal) {\n"
"				shadeN.x *= -1.f;\n"
"				shadeN.y *= -1.f;\n"
"				shadeN.z *= -1.f;\n"
"			}\n"
"\n"
"			uint matType = hitPointMat->type;\n"
"			radiance.r += throughput.r * hitPointMat->emi_r;\n"
"			radiance.g += throughput.g * hitPointMat->emi_g;\n"
"			radiance.b += throughput.b * hitPointMat->emi_b;\n"
"\n"
"			Vector wo;\n"
"			wo.x = -ray.d.x;\n"
"			wo.y = -ray.d.y;\n"
"			wo.z = -ray.d.z;\n"
"			Vector wi;\n"
"			float materialPdf;\n"
"			Spectrum f;\n"
"			bool diffuseBounce;\n"
"			switch (matType) {\n"
"\n"
"#if defined(PARAM_ENABLE_MAT_MATTE)\n"
"				case MAT_MATTE:\n"
"					Matte_Sample_f(&hitPointMat->param.matte, &wo, &wi, &materialPdf, &f, &shadeN, &seed, &diffuseBounce);\n"
"					break;\n"
"#endif\n"
"\n"
"#if defined(PARAM_ENABLE_MAT_MIRROR)\n"
"				case MAT_MIRROR:\n"
"					Mirror_Sample_f(&hitPointMat->param.mirror, &wo, &wi, &materialPdf, &f, &shadeN, &diffuseBounce);\n"
"					break;\n"
"#endif\n"
"\n"
"#if defined(PARAM_ENABLE_MAT_GLASS)\n"
"				case MAT_GLASS:\n"
"					Glass_Sample_f(&hitPointMat->param.glass, &wo, &wi, &materialPdf, &f, &N, &shadeN, &seed, &diffuseBounce);\n"
"					break;\n"
"#endif\n"
"\n"
"#if defined(PARAM_ENABLE_MAT_METAL)\n"
"				case MAT_METAL:\n"
"					Metal_Sample_f(&hitPointMat->param.metal, &wo, &wi, &materialPdf, &f, &shadeN, &seed, &diffuseBounce);\n"
"					break;\n"
"#endif\n"
"\n"
"#if defined(PARAM_ENABLE_MAT_ALLOY)\n"
"				case MAT_ALLOY:\n"
"					Alloy_Sample_f(&hitPointMat->param.alloy, &wo, &wi, &materialPdf, &f, &shadeN, &seed, &diffuseBounce);\n"
"					break;\n"
"#endif\n"
"\n"
"				default:\n"
"					// Huston, we have a problem...\n"
"					diffuseBounce = false;\n"
"					materialPdf = 0.f;\n"
"					break;\n"
"			}\n"
"\n"
"			if (materialPdf == 0.f)\n"
"				break;\n"
"\n"
"			if (diffuseBounce) {\n"
"				++diffuseBounces;\n"
"\n"
"				if (diffuseBounces > PARAM_MAX_DIFFUSE_BOUNCE)\n"
"					break;\n"
"			} else {\n"
"				++specularGlossyBounces;\n"
"\n"
"				if (specularGlossyBounces > PARAM_MAX_SPECULARGLOSSY_BOUNCE)\n"
"					break;\n"
"			}\n"
"\n"
"#if defined(PARAM_HAS_TEXTUREMAPS)\n"
"			const uint texMapIndex = hitTexMapInst->texMapIndex;\n"
"\n"
"			if (texMapIndex != 0xffffffffu) {\n"
"				const float tu = SphericalPhi(&N) * INV_TWOPI * hitTexMapInst->scaleU + hitTexMapInst->shiftU;\n"
"				const float tv = SphericalTheta(&N) * INV_PI * hitTexMapInst->scaleV + hitTexMapInst->shiftV;\n"
"\n"
"				PARAM_MEM_TYPE TexMap *tm = &texMaps[texMapIndex];\n"
"				Spectrum texCol;\n"
"				TexMap_GetColor(&texMapRGB[tm->rgbOffset], tm->width, tm->height, tu, tv, &texCol);\n"
"\n"
"				f.r *= texCol.r;\n"
"				f.g *= texCol.g;\n"
"				f.b *= texCol.b;\n"
"			}\n"
"#endif\n"
"\n"
"			throughput.r *= f.r;\n"
"			throughput.g *= f.g;\n"
"			throughput.b *= f.b;\n"
"\n"
"			ray.o = hitPoint;\n"
"			ray.d = wi;\n"
"			ray.mint = PARAM_RAY_EPSILON;\n"
"			ray.maxt = INFINITY;\n"
"		} else {\n"
"			Spectrum iLe;\n"
"			InfiniteLight_Le(infiniteLightMap, &iLe, &ray.d);\n"
"\n"
"			radiance.r += throughput.r * iLe.r;\n"
"			radiance.g += throughput.g * iLe.g;\n"
"			radiance.b += throughput.b * iLe.b;\n"
"			break;\n"
"		}\n"
"	}\n"
"\n"
"	/*if ((radiance.r < 0.f) || (radiance.g < 0.f) || (radiance.b < 0.f) ||\n"
"			isnan(radiance.r) || isnan(radiance.g) || isnan(radiance.b))\n"
"		printf(\"Error radiance: [%f, %f, %f]\\n\", radiance.r, radiance.g, radiance.b);*/\n"
"\n"
"	__global Pixel *p = &frameBuffer[pixelIndex];\n"
"	p->r += radiance.r * (1.f / PARAM_SCREEN_SAMPLEPERPASS);\n"
"	p->g += radiance.g * (1.f / PARAM_SCREEN_SAMPLEPERPASS);\n"
"	p->b += radiance.b * (1.f / PARAM_SCREEN_SAMPLEPERPASS);\n"
"\n"
"	// Save the seed\n"
"	task->seed.s1 = seed.s1;\n"
"	task->seed.s2 = seed.s2;\n"
"	task->seed.s3 = seed.s3;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Image filtering kernels\n"
"//------------------------------------------------------------------------------\n"
"\n"
"void ApplyBlurFilterXR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst,\n"
"		const float aF,\n"
"		const float bF,\n"
"		const float cF\n"
"		) {\n"
"	// Do left edge\n"
"	Pixel a;\n"
"	Pixel b = src[0];\n"
"	Pixel c = src[1];\n"
"\n"
"	const float leftTotF = bF + cF;\n"
"	const float bLeftK = bF / leftTotF;\n"
"	const float cLeftK = cF / leftTotF;\n"
"	dst[0].r = bLeftK  * b.r + cLeftK * c.r;\n"
"	dst[0].g = bLeftK  * b.g + cLeftK * c.g;\n"
"	dst[0].b = bLeftK  * b.b + cLeftK * c.b;\n"
"\n"
"    // Main loop\n"
"	const float totF = aF + bF + cF;\n"
"	const float aK = aF / totF;\n"
"	const float bK = bF / totF;\n"
"	const float cK = cF / totF;\n"
"\n"
"	for (unsigned int x = 1; x < PARAM_SCREEN_WIDTH - 1; ++x) {\n"
"		a = b;\n"
"		b = c;\n"
"		c = src[x + 1];\n"
"\n"
"		// AMD OpenCL have some problem to run this code\n"
"		dst[x].r = aK * a.r + bK * b.r + cK * c.r;\n"
"		dst[x].g = aK * a.g + bK * b.g + cK * c.g;\n"
"		dst[x].b = aK * a.b + bK * b.b + cK * c.b;\n"
"    }\n"
"\n"
"    // Do right edge\n"
"	const float rightTotF = aF + bF;\n"
"	const float aRightK = aF / rightTotF;\n"
"	const float bRightK = bF / rightTotF;\n"
"	a = b;\n"
"	b = c;\n"
"	dst[PARAM_SCREEN_WIDTH - 1].r = aRightK * a.r + bRightK * b.r;\n"
"	dst[PARAM_SCREEN_WIDTH - 1].g = aRightK * a.g + bRightK * b.g;\n"
"	dst[PARAM_SCREEN_WIDTH - 1].b = aRightK * a.b + bRightK * b.b;\n"
"\n"
"}\n"
"\n"
"void ApplyBlurFilterYR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst,\n"
"		const float aF,\n"
"		const float bF,\n"
"		const float cF\n"
"		) {\n"
"	// Do left edge\n"
"	Pixel a;\n"
"	Pixel b = src[0];\n"
"	Pixel c = src[PARAM_SCREEN_WIDTH];\n"
"\n"
"	const float leftTotF = bF + cF;\n"
"	const float bLeftK = bF / leftTotF;\n"
"	const float cLeftK = cF / leftTotF;\n"
"	dst[0].r = bLeftK  * b.r + cLeftK * c.r;\n"
"	dst[0].g = bLeftK  * b.g + cLeftK * c.g;\n"
"	dst[0].b = bLeftK  * b.b + cLeftK * c.b;\n"
"\n"
"    // Main loop\n"
"	const float totF = aF + bF + cF;\n"
"	const float aK = aF / totF;\n"
"	const float bK = bF / totF;\n"
"	const float cK = cF / totF;\n"
"\n"
"    for (unsigned int y = 1; y < PARAM_SCREEN_HEIGHT - 1; ++y) {\n"
"		const unsigned index = y * PARAM_SCREEN_WIDTH;\n"
"\n"
"		a = b;\n"
"		b = c;\n"
"		c = src[index + PARAM_SCREEN_WIDTH];\n"
"\n"
"		// AMD OpenCL have some problem to run this code\n"
"		dst[index].r = aK * a.r + bK * b.r + cK * c.r;\n"
"		dst[index].g = aK * a.g + bK * b.g + cK * c.g;\n"
"		dst[index].b = aK * a.b + bK * b.b + cK * c.b;\n"
"    }\n"
"\n"
"    // Do right edge\n"
"	const float rightTotF = aF + bF;\n"
"	const float aRightK = aF / rightTotF;\n"
"	const float bRightK = bF / rightTotF;\n"
"	a = b;\n"
"	b = c;\n"
"	dst[(PARAM_SCREEN_HEIGHT - 1) * PARAM_SCREEN_WIDTH].r = aRightK * a.r + bRightK * b.r;\n"
"	dst[(PARAM_SCREEN_HEIGHT - 1) * PARAM_SCREEN_WIDTH].g = aRightK * a.g + bRightK * b.g;\n"
"	dst[(PARAM_SCREEN_HEIGHT - 1) * PARAM_SCREEN_WIDTH].b = aRightK * a.b + bRightK * b.b;\n"
"}\n"
"\n"
"__kernel void ApplyBlurLightFilterXR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	src += gid * PARAM_SCREEN_WIDTH;\n"
"	dst += gid * PARAM_SCREEN_WIDTH;\n"
"\n"
"	const float aF = .15f;\n"
"	const float bF = 1.f;\n"
"	const float cF = .15f;\n"
"\n"
"	ApplyBlurFilterXR1(src, dst, aF, bF, cF);\n"
"}\n"
"\n"
"__kernel void ApplyBlurLightFilterYR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH)\n"
"		return;\n"
"\n"
"	src += gid;\n"
"	dst += gid;\n"
"\n"
"	const float aF = .15f;\n"
"	const float bF = 1.f;\n"
"	const float cF = .15f;\n"
"\n"
"	ApplyBlurFilterYR1(src, dst, aF, bF, cF);\n"
"}\n"
"\n"
"__kernel void ApplyBlurHeavyFilterXR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	src += gid * PARAM_SCREEN_WIDTH;\n"
"	dst += gid * PARAM_SCREEN_WIDTH;\n"
"\n"
"	const float aF = .35f;\n"
"	const float bF = 1.f;\n"
"	const float cF = .35f;\n"
"\n"
"	ApplyBlurFilterXR1(src, dst, aF, bF, cF);\n"
"}\n"
"\n"
"__kernel void ApplyBlurHeavyFilterYR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH)\n"
"		return;\n"
"\n"
"	src += gid;\n"
"	dst += gid;\n"
"\n"
"	const float aF = .35f;\n"
"	const float bF = 1.f;\n"
"	const float cF = .35f;\n"
"\n"
"	ApplyBlurFilterYR1(src, dst, aF, bF, cF);\n"
"}\n"
"\n"
"__kernel void ApplyBoxFilterXR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	src += gid * PARAM_SCREEN_WIDTH;\n"
"	dst += gid * PARAM_SCREEN_WIDTH;\n"
"\n"
"	const float aF = .35f;\n"
"	const float bF = 1.f;\n"
"	const float cF = .35f;\n"
"\n"
"	ApplyBlurFilterXR1(src, dst, aF, bF, cF);\n"
"}\n"
"\n"
"__kernel void ApplyBoxFilterYR1(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst\n"
"		) {\n"
"	const size_t gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH)\n"
"		return;\n"
"\n"
"	src += gid;\n"
"	dst += gid;\n"
"\n"
"	const float aF = 1.f / 3.f;\n"
"	const float bF = 1.f / 3.f;\n"
"	const float cF = 1.f / 3.f;\n"
"\n"
"	ApplyBlurFilterYR1(src, dst, aF, bF, cF);\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// BlendBuffer Kernel\n"
"//------------------------------------------------------------------------------\n"
"\n"
"__kernel void BlendFrame(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst,\n"
"		const float blendFactorSrc) {\n"
"	const int gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH * PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	const Pixel sp = src[gid];\n"
"	const Pixel dp = dst[gid];\n"
"	__global Pixel *p = &dst[gid];\n"
"\n"
"	const float blendFactorDst = 1.f - blendFactorSrc;\n"
"	p->r = blendFactorDst * dp.r + blendFactorSrc * sp.r;\n"
"	p->g = blendFactorDst * dp.g + blendFactorSrc * sp.g;\n"
"	p->b = blendFactorDst * dp.b + blendFactorSrc * sp.b;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// Linear Tone Map Kernel\n"
"//------------------------------------------------------------------------------\n"
"\n"
"__kernel void ToneMapLinear(\n"
"		__global Pixel *src,\n"
"		__global Pixel *dst) {\n"
"	const int gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH * PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	const Pixel sp = src[gid];\n"
"	__global Pixel *dp = &dst[gid];\n"
"\n"
"	dp->r = PARAM_TM_LINEAR_SCALE * sp.r;\n"
"	dp->g = PARAM_TM_LINEAR_SCALE * sp.g;\n"
"	dp->b = PARAM_TM_LINEAR_SCALE * sp.b;\n"
"}\n"
"\n"
"//------------------------------------------------------------------------------\n"
"// UpdatePixelBuffer Kernel\n"
"//------------------------------------------------------------------------------\n"
"\n"
"uint Radiance2PixelUInt(const float x) {\n"
"	return (uint)(pow(clamp(x, 0.f, 1.f), 1.f / PARAM_GAMMA) * 255.f + .5f);\n"
"}\n"
"\n"
"__kernel void UpdatePixelBuffer(\n"
"		__global Pixel *frameBuffer,\n"
"		__global uint *pbo) {\n"
"	const int gid = get_global_id(0);\n"
"	if (gid >= PARAM_SCREEN_WIDTH * PARAM_SCREEN_HEIGHT)\n"
"		return;\n"
"\n"
"	__global Pixel *p = &frameBuffer[gid];\n"
"\n"
"	const uint r = Radiance2PixelUInt(p->r);\n"
"	const uint g = Radiance2PixelUInt(p->g);\n"
"	const uint b = Radiance2PixelUInt(p->b);\n"
"	pbo[gid] = r | (g << 8) | (b << 16);\n"
"}\n"
;
#endif
